In [1]:
# move files into place that we need
!touch citation_scripts/__init__.py
!cp citation_scripts/apidois.json .

In [1]:
from citation_scripts import api_utilities
import json
import os
import sys
from IPython.display import clear_output

Get the list of DOIs from the provided json file:


In [2]:
with open("apidois.json", 'r') as fh:
    dois = sorted(json.load(fh))

For each DOI, use the rich citations API to download the full citation data. Save it to a JSON file in the citation_data folder:


In [3]:
for i, doi in enumerate(dois):
    pth = "citation_data/doi_{:05d}.json".format(i)

    # skip DOIs that we have already fetched
    if os.path.exists(pth):
        continue

    # print out progress
    clear_output()
    print("{} --> {}".format(doi, pth))
    sys.stdout.flush()

    # download the citation and save it
    citation = api_utilities.citations(doi)
    with open(pth, "w") as fh:
        json.dump(citation, fh)